num genre
1 125 Horror
2 110 Thrillers
3 38 Comedies
4 19 SciFi&Fantasy
5 12 Cult
6 2 Documentaties
7 2 Romantic
December 9, 2024
PI <- function(data, coverage_prob){
#Generates a normal prediction interval with an intended coverage probability of coverage_prob based on a vector of numeric data
n <- length(data)
lower_tscore <- qt((1-coverage_prob)/2, df = n - 1)
upper_tscore <- qt(((1-coverage_prob)/2) + coverage_prob, df = n - 1)
avg <- mean(data)
stan_d <- sd(data)
lower_bound <- avg + lower_tscore*stan_d * sqrt(1 + (1/n))
upper_bound <- avg + upper_tscore*stan_d * sqrt(1 + (1/n))
return(data.frame(PI_percentage = coverage_prob, lower = lower_bound, upper = upper_bound))
}one_beta_simulation <- function(n, alpha, beta, pi_prop){
#Assesses prediction accuracy and actual coverage probability of a normal prediction interval when used on a vector of numeric data of size n. The numeric data is generated from a beta distribution with parameters alpha and beta.
cover_df <- PI(rbeta(n, alpha, beta), pi_prop)
cover_prop <- pbeta(cover_df[1, "upper"], alpha, beta) - pbeta(cover_df[1, "lower"], alpha, beta) #this is the proportion of the data's parent distribution that is actually covered by the normal prediction interval generated for said data.
mean_in_interval <- .5 >= cover_df[1, "lower"] & .5 <= cover_df[1,"upper"]
param_df <- data.frame(cover = cover_prop, alpha = rep(alpha, nrow(cover_df)), beta = rep(beta, nrow(cover_df)), mean_in_interval = mean_in_interval)
df <- cbind(cover_df, param_df)
return(df)
}beta_sims_n <- function(n){
#Iterates over a vector of possible alpha = beta values and applies one_beta_simulation to each possible value of alpha/beta. All simulations use data of sample size n.
df1 <- map(parameters,\(param) one_beta_simulation(n, param, param, pi) ) %>%
list_rbind()
df2 <- data.frame(n = rep(n, nrow(df1)))
df <- cbind(df2, df1)
return(df)
} n PI_percentage lower upper cover alpha beta mean_in_interval
1 164 3.141593 NaN NaN NaN 123 123 NA
2 67 3.141593 NaN NaN NaN 17 17 NA
3 78 3.141593 NaN NaN NaN 113 113 NA
4 409 3.141593 NaN NaN NaN 195 195 NA
5 337 3.141593 NaN NaN NaN 67 67 NA
6 31 3.141593 NaN NaN NaN 151 151 NA
7 332 3.141593 NaN NaN NaN 65 65 NA
8 200 3.141593 NaN NaN NaN 187 187 NA
9 88 3.141593 NaN NaN NaN 91 91 NA
10 456 3.141593 NaN NaN NaN 55 55 NA